WikiArt is an amazing resource containing centuries of artwork. Since such datasets are wonderful for deep learning, Kaggle has hosted a challenge to characterize the 'fingerprints' of various artists. The Kaggle dataset contains metadata and also a set of images that have been resized so that the shorter dimension is 256 pixels. Here,we will construct a CNN model to classify some portraits and landscape images
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.python.client import device_lib
import matplotlib.pyplot as plt
import keras
from sklearn import metrics
import numpy as np
import pandas as pd
import os
from time import time
import shutil
import sys
from IPython.display import display, Image
from keras import layers
from keras import models
from keras import optimizers
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.python.eager import context
from keras.preprocessing import image
from keras.utils import layer_utils
from keras.utils.data_utils import get_file
from keras.applications.imagenet_utils import preprocess_input
from keras.callbacks import TensorBoard
from keras import backend as K
if K.backend()=='tensorflow':
K.set_image_data_format('channels_last')
# Config the matlotlib backend as plotting inline in IPython
%matplotlib inline
class TrainValTensorBoard(TensorBoard):
def __init__(self, log_dir='./logs', **kwargs):
# Make the original `TensorBoard` log to a subdirectory 'training'
training_log_dir = os.path.join(log_dir, 'training')
super(TrainValTensorBoard, self).__init__(training_log_dir, **kwargs)
# Log the validation metrics to a separate subdirectory
self.val_log_dir = os.path.join(log_dir, 'validation')
def set_model(self, model):
# Setup writer for validation metrics
self.val_writer = tf.summary.FileWriter(self.val_log_dir)
super(TrainValTensorBoard, self).set_model(model)
def on_epoch_end(self, epoch, logs=None):
# Pop the validation logs and handle them separately with
# `self.val_writer`. Also rename the keys so that they can
# be plotted on the same figure with the training metrics
logs = logs or {}
val_logs = {k.replace('val_', ''): v for k, v in logs.items() if k.startswith('val_')}
for name, value in val_logs.items():
summary = tf.Summary()
summary_value = summary.value.add()
summary_value.simple_value = value.item()
summary_value.tag = name
self.val_writer.add_summary(summary, epoch)
self.val_writer.flush()
# Pass the remaining logs to `TensorBoard.on_epoch_end`
logs = {k: v for k, v in logs.items() if not k.startswith('val_')}
super(TrainValTensorBoard, self).on_epoch_end(epoch, logs)
def on_train_end(self, logs=None):
super(TrainValTensorBoard, self).on_train_end(logs)
self.val_writer.close()
class TrainValTensorBoard(TensorBoard):
def __init__(self, log_dir='./logs', **kwargs):
# Make the original `TensorBoard` log to a subdirectory 'training'
training_log_dir = os.path.join(log_dir, 'training')
super(TrainValTensorBoard, self).__init__(training_log_dir, **kwargs)
# Log the validation metrics to a separate subdirectory
self.val_log_dir = os.path.join(log_dir, 'validation')
def set_model(self, model):
# Setup writer for validation metrics
self.val_writer = tf.summary.FileWriter(self.val_log_dir)
super(TrainValTensorBoard, self).set_model(model)
def on_epoch_end(self, epoch, logs=None):
# Pop the validation logs and handle them separately with
# `self.val_writer`. Also rename the keys so that they can
# be plotted on the same figure with the training metrics
logs = logs or {}
val_logs = {k.replace('val_', ''): v for k, v in logs.items() if k.startswith('val_')}
for name, value in val_logs.items():
summary = tf.Summary()
summary_value = summary.value.add()
summary_value.simple_value = value.item()
summary_value.tag = name
self.val_writer.add_summary(summary, epoch)
self.val_writer.flush()
# Pass the remaining logs to `TensorBoard.on_epoch_end`
logs = {k: v for k, v in logs.items() if not k.startswith('val_')}
super(TrainValTensorBoard, self).on_epoch_end(epoch, logs)
def on_train_end(self, logs=None):
super(TrainValTensorBoard, self).on_train_end(logs)
self.val_writer.close()
# special matplotlib command for global plot configuration
from matplotlib import rcParams
import matplotlib.cm as cm
import matplotlib as mpl
from matplotlib.colors import ListedColormap
from mpl_toolkits.mplot3d import Axes3D
dark2_colors = [(0.10588235294117647, 0.6196078431372549, 0.4666666666666667),
(0.9058823529411765, 0.1607843137254902, 0.5411764705882353),
(0.8509803921568627, 0.37254901960784315, 0.00784313725490196),
(0.4588235294117647, 0.4392156862745098, 0.7019607843137254),
(0.4, 0.6509803921568628, 0.11764705882352941),
(0.9019607843137255, 0.6705882352941176, 0.00784313725490196),
(0.6509803921568628, 0.4627450980392157, 0.11372549019607843)]
cmap_set1 = ListedColormap(['#e41a1c', '#377eb8', '#4daf4a'])
dark2_cmap=ListedColormap(dark2_colors)
def set_mpl_params():
rcParams['figure.figsize'] = (12, 6)
rcParams['figure.dpi'] = 100
rcParams['axes.prop_cycle'].by_key()['color'][1]
rcParams['lines.linewidth'] = 2
rcParams['axes.facecolor'] = 'white'
rcParams['font.size'] = 14
rcParams['patch.edgecolor'] = 'white'
rcParams['patch.facecolor'] = dark2_colors[0]
rcParams['font.family'] = 'StixGeneral'
set_mpl_params()
We creat three new folders -- "new_train", "new_test", and "new_validation". The folder "new_train" contains two subfolders "train_portrait" and "train_landscape", the folder "new_test" contains two subfolders "test_portrait" and "test_landscape", and the folder "new_validation" contains two subfolders "validation_portrait" and "validation_landscape".
# Split the files in the "train" set into two files "train_portrait" and "train_landscape":
f=open("./train/train.csv","rb")
list=pd.read_csv(f)
train_category = ['train_portrait', 'train_landscape']
for i in train_category:
os.mkdir(i)
category = ['portrait', 'landscape']
for i in category:
listnew=list[list["CATEGORY_ID"]==i]
l=listnew["FILE_ID"].tolist()
new_file = 'train_' + str(i)
for fname in l:
src = os.path.join('./train', fname)
dst = os.path.join(new_file, fname)
shutil.copyfile(src, dst)
# Split the files in the "test" set into two files "test_portrait" and "test_landscape":
f=open("./test/test.csv","rb")
list=pd.read_csv(f)
train_category = ['test_portrait', 'test_landscape']
for i in train_category:
os.mkdir(i)
category = ['portrait', 'landscape']
for i in category:
listnew=list[list["CATEGORY_ID"]==i]
l=listnew["FIELD_ID"].tolist()
new_file = 'test_' + str(i)
for fname in l:
src = os.path.join('./test', fname)
dst = os.path.join(new_file, fname)
shutil.copyfile(src, dst)
# Split the files in the "validation" set into two files "validation_portrait" and "validation_landscape":
f=open("./validation/validation.csv","rb")
list=pd.read_csv(f)
train_category = ['validation_portrait', 'validation_landscape']
for i in train_category:
os.mkdir(i)
category = ['portrait', 'landscape']
for i in category:
listnew=list[list["CATEGORY_ID"]==i]
l=listnew["FIELD_ID"].tolist()
new_file = 'validation_' + str(i)
for fname in l:
src = os.path.join('./validation', fname)
dst = os.path.join(new_file, fname)
shutil.copyfile(src, dst)
#os.mkdir('new_train')
#os.mkdir('new_test')
#os.mkdir('new_validation')
from keras.preprocessing.image import ImageDataGenerator
train_datagen = ImageDataGenerator(rescale=1./255)
validation_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)
train_dir = './new_train'
validation_dir = 'new_validation'
test_dir = './new_test'
train_generator = train_datagen.flow_from_directory(
train_dir,
target_size=(64, 64),
batch_size=32,
class_mode='binary')
validation_generator = validation_datagen.flow_from_directory(
validation_dir,
target_size=(64, 64),
batch_size=32,
class_mode='binary')
test_generator = test_datagen.flow_from_directory(
test_dir,
target_size=(64, 64),
batch_size=32,
class_mode='binary')
def plot_strip(data, labels, display_index):
plt.rcParams['figure.figsize'] = (20.0, 20.0)
f, ax = plt.subplots(nrows=1, ncols=10)
for i, j in enumerate(display_index):
ax[i].axis('off')
ax[i].set_title(['Landscape', 'Portrait'][int(labels[j])], loc='center')
ax[i].imshow(data[j,:,:], cmap='gray')
# The function "index_collector" is used to collect the indexes of the first five portraits and the first five landscapes.
def index_collector(labels):
portrait_index = []
landsapce_index = []
for i in range (0, 20):
if labels[i] == 1.0:
portrait_index.append(i)
else:
landsapce_index.append(i)
portrait_index = portrait_index[:5]
landsapce_index = landsapce_index[:5]
index_list = portrait_index + landsapce_index
return index_list
for train_data_batch, train_labels_batch in train_generator:
print('data batch shape:', train_data_batch.shape)
print('labels batch shape:', train_labels_batch.shape)
break
display_index_train = index_collector(train_labels_batch.tolist())
plot_strip(train_data_batch, train_labels_batch, display_index_train)
for validation_data_batch, validation_labels_batch in validation_generator:
print('data batch shape:', validation_data_batch.shape)
print('labels batch shape:', validation_labels_batch.shape)
break
display_index_validation = index_collector(validation_labels_batch.tolist())
plot_strip(validation_data_batch, validation_labels_batch, display_index_validation)
for test_data_batch, test_labels_batch in test_generator:
print('data batch shape:', test_data_batch.shape)
print('labels batch shape:', test_labels_batch.shape)
break
display_index_test = index_collector(test_labels_batch.tolist())
plot_strip(test_data_batch, test_labels_batch, display_index_test)
Construct a baseline CNN classifier using Keras for the training set and assess the validation set performance at each epoch. The goal is to correctly classify portraits from landscapes. The resulting performance on the training and validation set will be plotted as a function of epoch using the criteria over which you are optimizing.
def Simple_CNN_Model (activation_function_1, activation_function_2, optimizer_type, epoch_number):
K.clear_session()
model = models.Sequential(name='FiveLayerModel')
model.add(layers.Conv2D(32, (3, 3), padding='same', activation=activation_function_1,
input_shape=train_data_batch.shape[1:], name = 'conv1'))
model.add(layers.MaxPooling2D((2, 2), name='max_pool1'))
model.add(layers.Conv2D(64, (3, 3), padding='same', activation=activation_function_1, name = 'conv2'))
model.add(layers.MaxPooling2D((2, 2), name='max_pool2'))
model.add(layers.Conv2D(128, (3, 3), padding='same', activation=activation_function_1, name = 'conv3'))
model.add(layers.MaxPooling2D((2, 2), name='max_pool3'))
model.add(layers.Conv2D(128, (3, 3), padding='same', activation=activation_function_1, name = 'conv4'))
model.add(layers.MaxPooling2D((2, 2), name='max_pool4'))
model.add(layers.Flatten())
model.add(layers.Dense(512, kernel_initializer='glorot_uniform', activation=activation_function_1, name='fc1'))
model.add(layers.Dense(1, kernel_initializer='glorot_uniform', activation=activation_function_2, name='fc2'))
model.compile(loss='binary_crossentropy', optimizer = optimizer_type, metrics=['accuracy'])
model.summary()
# Fit the model
history = model.fit_generator(
train_generator,
steps_per_epoch=100,
epochs=epoch_number,
validation_data=validation_generator,
validation_steps=50,
verbose=1,
# callbacks=[TrainValTensorBoard("logs/{}".format(time()), write_graph=True)]
)
test_loss, test_acc = model.evaluate_generator(test_generator, steps=100)
print('\ntest accuracy:', test_acc)
print('test loss:', test_loss)
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(len(acc))
plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'g-', label='Validation acc')
plt.xlabel("Num of Epochs")
plt.ylabel("Accuracy")
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'g-', label='Validation loss')
plt.xlabel("Num of Epochs")
plt.ylabel("Loss")
plt.title('Training and validation loss')
plt.legend()
plt.show()
sgd = optimizers.SGD(lr = 0.05, decay=1e-5, momentum=0.9, nesterov=True)
Simple_CNN_Model('relu', 'sigmoid', 'sgd', 60)
Comments
From the pattern of training and validation curves, describe what is good/bad and what you plan to do next to improve the result.
(A) The Good Aspect
I tried different combinations for the "activation_function_1" and "activation_function_2", and several different types of optimizer in Problem 2. It turned the combination -- "activation_function_1 = "relu" and "activation_function_2 = "sigmoid" -- works the best among all combinations I have tried. ANd also, the "stochastic gradient descent" optimizer defined in Problem 2 works well.
(1) In problem 2, I tried 60 epochs in total. Both the training accuracy and validation accuracy increase with the number of epochs, while both the training loss and validation loss decrease with the number of epochs, although they bounce around a little bit.
(2) The decrease in tarining loss and the decrease in validation loss are apparent -- they dropped from a large initial value (around $68\%$) to a small final value (around $20\%$).
(3) Similarly, the increase in tarining accuracy and the increase in validation accuracy are apparent as well -- they rose from a samll initial value (around $54\%$) to a large final value (around $90\%$).
(B) The Bad Aspect
(1) Both the tarining loss and the validation loss reached around $20\%$ after 47 epochs and stalled. Increasing the number of epochs after 47 epochs does NOT decrease either the tarining loss or the validation loss.
(2) Both the training accuracy and the validation accuracy reached around $90\%$ after 24 epochs and stalled. Increasing the number of epochs after 24 epochs does NOT improve either the tarining accuracy or the validation accuracy.
(C) Plan of Improving the Results
To improve the results, I will try different architectures, different parameteres, and different optimizers//regularizations to obtain an optimal CNN. This is what I will do in Problem 4.
To find an optimal architecture for the CNN, different number of convolution-subsambling pairs, different number of feature maps, and different number of units for the dense layers will be tried here.
Here, the accuracy and loss of training, validation, and test sets obtained from different architectures will be used to choose the optimal architecture. The final values of accuracy and loss and how the accuracy and loss change with epochs will be compared too determine the optimal values.
# Here, we will try differnet values for the number of convolution-subsambling pairs: 1, 2, 3, 4, and 5.
def CNN_Architecture_Selector_1 (activation_function_1, activation_function_2, optimizer_type, epoch_number):
K.clear_session()
model = [0] * 5
for j in range(0, 5):
model[j] = models.Sequential()
model[j].add(layers.Conv2D(32, (3, 3), padding='same', activation=activation_function_1,
input_shape=train_data_batch.shape[1:], name = 'conv1'))
model[j].add(layers.MaxPooling2D((2, 2), name='max_pool1'))
if j>0:
model[j].add(layers.Conv2D(64, (3, 3), padding='same', activation=activation_function_1, name = 'conv2'))
model[j].add(layers.MaxPooling2D((2, 2), name='max_pool2'))
if j>1:
model[j].add(layers.Conv2D(128, (3, 3), padding='same', activation=activation_function_1, name = 'conv3'))
model[j].add(layers.MaxPooling2D((2, 2), name='max_pool3'))
if j>2:
model[j].add(layers.Conv2D(256, (3, 3), padding='same', activation=activation_function_1, name = 'conv4'))
model[j].add(layers.MaxPooling2D((2, 2), name='max_pool4'))
if j>3:
model[j].add(layers.Conv2D(512, (3, 3), padding='same', activation=activation_function_1, name = 'conv5'))
model[j].add(layers.MaxPooling2D((2, 2), name='max_pool5'))
model[j].add(layers.Flatten())
model[j].add(layers.Dense(512, kernel_initializer='glorot_uniform', activation=activation_function_1, name='fc1'))
model[j].add(layers.Dense(1, kernel_initializer='glorot_uniform', activation=activation_function_2, name='fc2'))
model[j].compile(loss='binary_crossentropy', optimizer = optimizer_type, metrics=['accuracy'])
model[j].summary()
# Fit the model
history = model[j].fit_generator(
train_generator,
steps_per_epoch=100,
epochs=epoch_number,
validation_data=validation_generator,
validation_steps=50,
verbose=1,
# callbacks=[TrainValTensorBoard("logs/{}".format(time()), write_graph=True)]
)
test_loss, test_acc = model[j].evaluate_generator(test_generator, steps=100)
print('\ntest accuracy:', test_acc)
print('test loss:', test_loss)
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(len(acc))
plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'g-', label='Validation acc')
plt.xlabel("Num of Epochs")
plt.ylabel("Accuracy")
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'g-', label='Validation loss')
plt.xlabel("Num of Epochs")
plt.ylabel("Loss")
plt.title('Training and validation loss')
plt.legend()
plt.show()
sgd = optimizers.SGD(lr = 0.05, decay=1e-5, momentum=0.9, nesterov=True)
CNN_Architecture_Selector_1('relu', 'sigmoid', 'sgd', 40)
Here, we set the number of convolution-subsambling pairs as 2. We will try differnet values for the number of feature maps for the two convolutional layers. The paris of the first and second convolutional layers we will are: (16, 32), (32, 64), (48, 96), (64, 128), and (80, 160).
def CNN_Architecture_Selector_2 (activation_function_1, activation_function_2, optimizer_type, epoch_number):
K.clear_session()
model = [0] * 5
for j in range(0, 5):
model[j] = models.Sequential()
model[j].add(layers.Conv2D(j*16+16, (3, 3), padding='same', activation=activation_function_1,
input_shape=train_data_batch.shape[1:], name = 'conv1'))
model[j].add(layers.MaxPooling2D((2, 2), name='max_pool1'))
model[j].add(layers.Conv2D(j*32+32, (3, 3), padding='same', activation=activation_function_1, name = 'conv2'))
model[j].add(layers.MaxPooling2D((2, 2), name='max_pool2'))
model[j].add(layers.Flatten())
model[j].add(layers.Dense(512, kernel_initializer='glorot_uniform', activation=activation_function_1, name='fc1'))
model[j].add(layers.Dense(1, kernel_initializer='glorot_uniform', activation=activation_function_2, name='fc2'))
model[j].compile(loss='binary_crossentropy', optimizer = optimizer_type, metrics=['accuracy'])
model[j].summary()
# Fit the model
history = model[j].fit_generator(
train_generator,
steps_per_epoch=100,
epochs=epoch_number,
validation_data=validation_generator,
validation_steps=50,
verbose=1,
# callbacks=[TrainValTensorBoard("logs/{}".format(time()), write_graph=True)]
)
test_loss, test_acc = model[j].evaluate_generator(test_generator, steps=100)
print('\ntest accuracy:', test_acc)
print('test loss:', test_loss)
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(len(acc))
plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'g-', label='Validation acc')
plt.xlabel("Num of Epochs")
plt.ylabel("Accuracy")
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'g-', label='Validation loss')
plt.xlabel("Num of Epochs")
plt.ylabel("Loss")
plt.title('Training and validation loss')
plt.legend()
plt.show()
sgd = optimizers.SGD(lr = 0.05, decay=1e-5, momentum=0.9, nesterov=True)
CNN_Architecture_Selector_2('relu', 'sigmoid', 'sgd', 40)
Here, we set the number of convolution-subsambling pairs as 2, and use 16 maps in the first convolutional layer and 32 maps in the second convolutional layer. I will try different number of units for the dense layers: 16, 32, 64, 128, 256, 512, and 1024.
def CNN_Architecture_Selector_3 (activation_function_1, activation_function_2, optimizer_type, epoch_number):
K.clear_session()
model = [0] * 7
for j in range(0, 7):
model[j] = models.Sequential()
model[j].add(layers.Conv2D(16, (3, 3), padding='same', activation=activation_function_1,
input_shape=train_data_batch.shape[1:], name = 'conv1'))
model[j].add(layers.MaxPooling2D((2, 2), name='max_pool1'))
model[j].add(layers.Conv2D(32, (3, 3), padding='same', activation=activation_function_1, name = 'conv2'))
model[j].add(layers.MaxPooling2D((2, 2), name='max_pool2'))
model[j].add(layers.Flatten())
model[j].add(layers.Dense(2**(j+4), kernel_initializer='glorot_uniform', activation=activation_function_1, name='fc1'))
model[j].add(layers.Dense(1, kernel_initializer='glorot_uniform', activation=activation_function_2, name='fc2'))
model[j].compile(loss='binary_crossentropy', optimizer = optimizer_type, metrics=['accuracy'])
model[j].summary()
# Fit the model
history = model[j].fit_generator(
train_generator,
steps_per_epoch=100,
epochs=epoch_number,
validation_data=validation_generator,
validation_steps=50,
verbose=1,
# callbacks=[TrainValTensorBoard("logs/{}".format(time()), write_graph=True)]
)
test_loss, test_acc = model[j].evaluate_generator(test_generator, steps=100)
print('\ntest accuracy:', test_acc)
print('test loss:', test_loss)
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(len(acc))
plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'g-', label='Validation acc')
plt.xlabel("Num of Epochs")
plt.ylabel("Accuracy")
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'g-', label='Validation loss')
plt.xlabel("Num of Epochs")
plt.ylabel("Loss")
plt.title('Training and validation loss')
plt.legend()
plt.show()
sgd = optimizers.SGD(lr = 0.05, decay=1e-5, momentum=0.9, nesterov=True)
CNN_Architecture_Selector_3('relu', 'sigmoid', 'sgd', 40)
(1) The number of convolution-subsambling pairs: Based on the accuracy and loss of training, validation, and test sets, we found that the optimal number of convolution-subsambling pairs is 2.
(2) The number of feature maps: It appears that 16 maps in the first convolutional layer and 32 maps in the second convolutional layer is the optimal choice. Increasing the number of feature maps only improves the results slightly better, which is not worth the additional computational cost.
(3) The number of units for the dense layers: It appears that 16 units for the dense layers is the optimal choice. Increasing the number of feature maps only improves the results slightly better, which is not worth the additional computational cost.
In this part, I will try different optimizers, dropout, and regularization to construct an optimal CNN. The optimizers I will try include SGD, Adagrad, Adadelta, RMSprop, and Adam. Dropout with different rates (i.e., the fraction of the input units to drop) will be tried to reduce the overfitting as much as possible. And different regularizers will be used to apply penalties on layer parameters.
Here, the accuracy and loss of training, validation, and test sets obtained from different architectures will be used to choose the optimal architecture. The final values of accuracy and loss and how the accuracy and loss change with epochs will be compared too determine the optimal values.
Here, we set the number of convolution-subsambling pairs as 2, use 16 maps in the first convolutional layer and 32 maps in the second convolutional layer, and set the number of units for the dense layers as 16. Based on the observations, we find that using 20 epochs is enough to obtain an optimal result, therefore we will use 20 epochs in the following experiments.
Five different optimizers, including SGD, Adagrad, Adadelta, RMSprop, and Adam, are tried in this part.
def CNN_Optimizer_Selector (activation_function_1, activation_function_2):
sgd = optimizers.SGD(lr = 0.05, decay=1e-5, momentum=0.9, nesterov=True)
RMSprop = optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=None, decay=0.0)
Adagrad = optimizers.Adagrad(lr=0.01, epsilon=None, decay=0.0)
Adadelta = optimizers.Adadelta(lr=1.0, rho=0.95, epsilon=None, decay=0.0)
Adam = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
optimizer_type = ['sgd', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam']
K.clear_session()
model = [0] * 5
for j in range(0, 5):
model[j] = models.Sequential()
model[j].add(layers.Conv2D(16, (3, 3), padding='same', activation=activation_function_1,
input_shape=train_data_batch.shape[1:], name = 'conv1'))
model[j].add(layers.MaxPooling2D((2, 2), name='max_pool1'))
model[j].add(layers.Conv2D(32, (3, 3), padding='same', activation=activation_function_1, name = 'conv2'))
model[j].add(layers.MaxPooling2D((2, 2), name='max_pool2'))
model[j].add(layers.Flatten())
model[j].add(layers.Dense(16, kernel_initializer='glorot_uniform', activation=activation_function_1, name='fc1'))
model[j].add(layers.Dense(1, kernel_initializer='glorot_uniform', activation=activation_function_2, name='fc2'))
model[j].compile(loss='binary_crossentropy', optimizer = optimizer_type[j], metrics=['accuracy'])
print('\ntest optimizer:', optimizer_type[j])
model[j].summary()
# Fit the model
history = model[j].fit_generator(
train_generator,
steps_per_epoch=100,
epochs=20,
validation_data=validation_generator,
validation_steps=50,
verbose=1,
# callbacks=[TrainValTensorBoard("logs/{}".format(time()), write_graph=True)]
)
test_loss, test_acc = model[j].evaluate_generator(test_generator, steps=100)
print('\ntest accuracy:', test_acc)
print('test loss:', test_loss)
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(len(acc))
plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'g-', label='Validation acc')
plt.xlabel("Num of Epochs")
plt.ylabel("Accuracy")
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'g-', label='Validation loss')
plt.xlabel("Num of Epochs")
plt.ylabel("Loss")
plt.title('Training and validation loss')
plt.legend()
plt.show()
CNN_Optimizer_Selector('relu', 'sigmoid')
Five different rates, including 0, 0.2, 0.5, 0.8, and 1, are tried for the dropout in this part.
def CNN_Dropout_Selector (activation_function_1, activation_function_2):
K.clear_session()
Adadelta = optimizers.Adadelta(lr=1.0, rho=0.95, epsilon=None, decay=0.0)
rate = [0.0, 0.2, 0.5, 0.8, 1.0]
model = [0] * 5
for j in range(0, 5):
model[j] = models.Sequential()
model[j].add(layers.Conv2D(16, (3, 3), padding='same', activation=activation_function_1,
input_shape=train_data_batch.shape[1:], name = 'conv1'))
model[j].add(layers.MaxPooling2D((2, 2), name='max_pool1'))
model[j].add(layers.Conv2D(32, (3, 3), padding='same', activation=activation_function_1, name = 'conv2'))
model[j].add(layers.MaxPooling2D((2, 2), name='max_pool2'))
model[j].add(layers.Flatten())
model[j].add(layers.Dense(16, kernel_initializer='glorot_uniform', activation=activation_function_1, name='fc1'))
model[j].add(layers.Dropout(rate[j], noise_shape=None, seed=None))
model[j].add(layers.Dense(1, kernel_initializer='glorot_uniform', activation=activation_function_2, name='fc2'))
model[j].add(layers.Dropout(rate[j], noise_shape=None, seed=None))
model[j].compile(loss='binary_crossentropy', optimizer = Adadelta, metrics=['accuracy'])
print('\n Dropout Rate:', rate[j])
# Fit the model
history = model[j].fit_generator(
train_generator,
steps_per_epoch=100,
epochs=20,
validation_data=validation_generator,
validation_steps=50,
verbose=1,
# callbacks=[TrainValTensorBoard("logs/{}".format(time()), write_graph=True)]
)
test_loss, test_acc = model[j].evaluate_generator(test_generator, steps=100)
print('\ntest accuracy:', test_acc)
print('test loss:', test_loss)
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(len(acc))
plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'g-', label='Validation acc')
plt.xlabel("Num of Epochs")
plt.ylabel("Accuracy")
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'g-', label='Validation loss')
plt.xlabel("Num of Epochs")
plt.ylabel("Loss")
plt.title('Training and validation loss')
plt.legend()
plt.show()
CNN_Dropout_Selector('relu', 'sigmoid')
Five different penalities are put on the higher parameter values: 0.01, 0.02, 0.05. 0.08, 0.09.
def CNN_Regularizer_Selector (activation_function_1, activation_function_2):
K.clear_session()
Adadelta = optimizers.Adadelta(lr=1.0, rho=0.95, epsilon=None, decay=0.0)
reg = [0, 0.00001, 0.0001, 0.001, 0.1]
model = [0] * 5
for j in range(0, 5):
model[j] = models.Sequential()
model[j].add(layers.Conv2D(16, (3, 3), padding='same', activation=activation_function_1,
input_shape=train_data_batch.shape[1:], name = 'conv1'))
model[j].add(layers.MaxPooling2D((2, 2), name='max_pool1'))
model[j].add(layers.Conv2D(32, (3, 3), padding='same', activation=activation_function_1, name = 'conv2'))
model[j].add(layers.MaxPooling2D((2, 2), name='max_pool2'))
model[j].add(layers.Flatten())
model[j].add(layers.Dense(16, kernel_initializer='glorot_uniform', activation=activation_function_1,
kernel_regularizer=keras.regularizers.l2(reg[j]), activity_regularizer=keras.regularizers.l1(reg[j]), name='fc1'))
model[j].add(layers.Dropout(0, noise_shape=None, seed=None))
model[j].add(layers.Dense(1, kernel_initializer='glorot_uniform', activation=activation_function_2,
kernel_regularizer=keras.regularizers.l2(reg[j]), activity_regularizer=keras.regularizers.l1(reg[j]), name='fc2'))
model[j].add(layers.Dropout(0, noise_shape=None, seed=None))
model[j].compile(loss='binary_crossentropy', optimizer = Adadelta, metrics=['accuracy'])
print('\n Regulation Penalty:', reg[j])
# Fit the model
history = model[j].fit_generator(
train_generator,
steps_per_epoch=100,
epochs=20,
validation_data=validation_generator,
validation_steps=50,
verbose=1,
# callbacks=[TrainValTensorBoard("logs/{}".format(time()), write_graph=True)]
)
test_loss, test_acc = model[j].evaluate_generator(test_generator, steps=100)
print('\ntest accuracy:', test_acc)
print('test loss:', test_loss)
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(len(acc))
plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'g-', label='Validation acc')
plt.xlabel("Num of Epochs")
plt.ylabel("Accuracy")
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'g-', label='Validation loss')
plt.xlabel("Num of Epochs")
plt.ylabel("Loss")
plt.title('Training and validation loss')
plt.legend()
plt.show()
CNN_Regularizer_Selector('relu', 'sigmoid')
Based on the accuracy and loss of training, validation, and test sets, the best optimizer for our image dataset is Adadelta, the best rate for dropout is 0, and the best penality for our image dataset is 0.00001.
Here, we will explore the optimal values for several parameters for our CNN: Batch size, Learning Rate, Adadelta Decay Factor, and Initial Learning Rate Decay. We will try different bacth sizes: 16, 32, 64, 128, and 256, different learning_rate for the Adadelta optimizer: 0.1, 0.5, 1, 5, and 10, and different decay factor for the Adadelta optimizer: 0, 0.1, 0.5, 0.9, and 1.
Here, the accuracy and loss of training, validation, and test sets obtained from different architectures will be used to choose the optimal architecture. The final values of accuracy and loss and how the accuracy and loss change with epochs will be compared too determine the optimal values.
The optimal CNN model for the iamge dataset obtained in Part 1 and Part 2 will be used in this Part 3. We will try different bacth sizes: 16, 32, 64, 128, and 256.
def CNN_Parameter_Tuning_1 (activation_function_1, activation_function_2):
K.clear_session()
Adadelta = optimizers.Adadelta(lr=1.0, rho=0.95, epsilon=None, decay=0.0)
model = [0] * 5
batches = [0] * 5
for j in range(0, 5):
batches[j] = 2**(j+4)
train_generator_new = train_datagen.flow_from_directory(
train_dir,
target_size=(64, 64),
batch_size = batches[j],
class_mode='binary')
for train_data_batch, train_labels_batch in train_generator_new:
print('data batch shape:', train_data_batch.shape)
print('labels batch shape:', train_labels_batch.shape)
break
model[j] = models.Sequential()
model[j].add(layers.Conv2D(16, (3, 3), padding='same', activation=activation_function_1,
input_shape=train_data_batch.shape[1:], name = 'conv1'))
model[j].add(layers.MaxPooling2D((2, 2), name='max_pool1'))
model[j].add(layers.Conv2D(32, (3, 3), padding='same', activation=activation_function_1, name = 'conv2'))
model[j].add(layers.MaxPooling2D((2, 2), name='max_pool2'))
model[j].add(layers.Flatten())
model[j].add(layers.Dense(16, kernel_initializer='glorot_uniform', activation=activation_function_1,
kernel_regularizer=keras.regularizers.l2(0.00001), activity_regularizer=keras.regularizers.l1(0.00001), name='fc1'))
model[j].add(layers.Dropout(0, noise_shape=None, seed=None))
model[j].add(layers.Dense(1, kernel_initializer='glorot_uniform', activation=activation_function_2,
kernel_regularizer=keras.regularizers.l2(0.00001), activity_regularizer=keras.regularizers.l1(0.00001), name='fc2'))
model[j].add(layers.Dropout(0, noise_shape=None, seed=None))
model[j].compile(loss='binary_crossentropy', optimizer = Adadelta, metrics=['accuracy'])
# Fit the model
history = model[j].fit_generator(
train_generator_new,
steps_per_epoch=100,
epochs=20,
validation_data=validation_generator,
validation_steps=50,
verbose=1,
# callbacks=[TrainValTensorBoard("logs/{}".format(time()), write_graph=True)
)
test_loss, test_acc = model[j].evaluate_generator(test_generator, steps=100)
print('\batch_size:', batches[j])
print('\ntest accuracy:', test_acc)
print('test loss:', test_loss)
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(len(acc))
plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'g-', label='Validation acc')
plt.xlabel("Num of Epochs")
plt.ylabel("Accuracy")
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'g-', label='Validation loss')
plt.xlabel("Num of Epochs")
plt.ylabel("Loss")
plt.title('Training and validation loss')
plt.legend()
plt.show()
CNN_Parameter_Tuning_1('relu', 'sigmoid')
The optimal CNN model for the iamge dataset obtained in Part 1 and Part 2 will be used in this Part 3. We will try different learning_rate: 0.1, 0.5, 1, 5, and 10.
def CNN_Parameter_Tuning_2 (activation_function_1, activation_function_2):
K.clear_session()
batch_size = 16
train_generator_new = train_datagen.flow_from_directory(
train_dir,
target_size=(64, 64),
batch_size = batch_size,
class_mode='binary')
learning_rate = [0.1, 0.5, 1, 5, 10]
model = [0] * 5
for j in range(0, 5):
Adadelta = optimizers.Adadelta(lr=learning_rate[j], rho=0.95, epsilon=None, decay=0.0)
model[j] = models.Sequential()
model[j].add(layers.Conv2D(16, (3, 3), padding='same', activation=activation_function_1,
input_shape=train_data_batch.shape[1:], name = 'conv1'))
model[j].add(layers.MaxPooling2D((2, 2), name='max_pool1'))
model[j].add(layers.Conv2D(32, (3, 3), padding='same', activation=activation_function_1, name = 'conv2'))
model[j].add(layers.MaxPooling2D((2, 2), name='max_pool2'))
model[j].add(layers.Flatten())
model[j].add(layers.Dense(16, kernel_initializer='glorot_uniform', activation=activation_function_1,
kernel_regularizer=keras.regularizers.l2(0.00001), activity_regularizer=keras.regularizers.l1(0.00001), name='fc1'))
model[j].add(layers.Dropout(0, noise_shape=None, seed=None))
model[j].add(layers.Dense(1, kernel_initializer='glorot_uniform', activation=activation_function_2,
kernel_regularizer=keras.regularizers.l2(0.00001), activity_regularizer=keras.regularizers.l1(0.00001), name='fc2'))
model[j].add(layers.Dropout(0, noise_shape=None, seed=None))
model[j].compile(loss='binary_crossentropy', optimizer = Adadelta, metrics=['accuracy'])
# Fit the model
history = model[j].fit_generator(
train_generator_new,
steps_per_epoch=100,
epochs=20,
validation_data=validation_generator,
validation_steps=50,
verbose=1,
# callbacks=[TrainValTensorBoard("logs/{}".format(time()), write_graph=True)]
)
test_loss, test_acc = model[j].evaluate_generator(test_generator, steps=100)
print('learning rate:', learning_rate[j])
print('\ntest accuracy:', test_acc)
print('test loss:', test_loss)
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(len(acc))
plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'g-', label='Validation acc')
plt.xlabel("Num of Epochs")
plt.ylabel("Accuracy")
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'g-', label='Validation loss')
plt.xlabel("Num of Epochs")
plt.ylabel("Loss")
plt.title('Training and validation loss')
plt.legend()
plt.show()
CNN_Parameter_Tuning_2('relu', 'sigmoid')
The optimal CNN model for the iamge dataset obtained in Part 1 and Part 2 will be used. We will try different decay factor (i.e., the fraction of gradient to keep at each time step), including 0, 0.1, 0.5, 0.9, and 1.
def CNN_Parameter_Tuning_3 (activation_function_1, activation_function_2):
K.clear_session()
decay_factor = [0, 0.1, 0.5, 0.9, 1]
model = [0] * 5
for j in range(0, 5):
Adadelta = optimizers.Adadelta(lr=0.1, rho = decay_factor[j], epsilon=None, decay=0.0)
model[j] = models.Sequential()
model[j].add(layers.Conv2D(16, (3, 3), padding='same', activation=activation_function_1,
input_shape=train_data_batch.shape[1:], name = 'conv1'))
model[j].add(layers.MaxPooling2D((2, 2), name='max_pool1'))
model[j].add(layers.Conv2D(32, (3, 3), padding='same', activation=activation_function_1, name = 'conv2'))
model[j].add(layers.MaxPooling2D((2, 2), name='max_pool2'))
model[j].add(layers.Flatten())
model[j].add(layers.Dense(16, kernel_initializer='glorot_uniform', activation=activation_function_1,
kernel_regularizer=keras.regularizers.l2(0.00001), activity_regularizer=keras.regularizers.l1(0.00001), name='fc1'))
model[j].add(layers.Dropout(0, noise_shape=None, seed=None))
model[j].add(layers.Dense(1, kernel_initializer='glorot_uniform', activation=activation_function_2,
kernel_regularizer=keras.regularizers.l2(0.00001), activity_regularizer=keras.regularizers.l1(0.00001), name='fc2'))
model[j].add(layers.Dropout(0, noise_shape=None, seed=None))
model[j].compile(loss='binary_crossentropy', optimizer = Adadelta, metrics=['accuracy'])
# Fit the model
history = model[j].fit_generator(
train_generator,
steps_per_epoch=100,
epochs=20,
validation_data=validation_generator,
validation_steps=50,
verbose=1,
# callbacks=[TrainValTensorBoard("logs/{}".format(time()), write_graph=True)]
)
test_loss, test_acc = model[j].evaluate_generator(test_generator, steps=100)
print('decay factor:', decay_factor[j])
print('\ntest accuracy:', test_acc)
print('test loss:', test_loss)
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(len(acc))
plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'g-', label='Validation acc')
plt.xlabel("Num of Epochs")
plt.ylabel("Accuracy")
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'g-', label='Validation loss')
plt.xlabel("Num of Epochs")
plt.ylabel("Loss")
plt.title('Training and validation loss')
plt.legend()
plt.show()
CNN_Parameter_Tuning_3('relu', 'sigmoid')
The number of optimal batch size: Based on the accuracy and loss of training, validation, and test sets, we found that the optimal batch size is 16. Increasing the batch size does NOT apparently improve the results, which is not worth the additional computational cost. Besides, the optimal learning rate for the Adadelta optimizer is 0.1, and the optimal decay factor for the Adadelta optimizer is 1.
K.clear_session()
train_generator = train_datagen.flow_from_directory(
train_dir,
target_size=(64, 64),
batch_size = 16,
class_mode='binary'
)
for train_data_batch, train_labels_batch in train_generator:
print('data batch shape:', train_data_batch.shape)
print('labels batch shape:', train_labels_batch.shape)
break
Adadelta = optimizers.Adadelta(lr=0.1, rho = 1, epsilon=None, decay=0.0)
model = models.Sequential()
model.add(layers.Conv2D(16, (3, 3), padding='same', activation='relu',
input_shape=train_data_batch.shape[1:], name = 'conv1'))
model.add(layers.MaxPooling2D((2, 2), name='max_pool1'))
model.add(layers.Conv2D(32, (3, 3), padding='same', activation='relu', name = 'conv2'))
model.add(layers.MaxPooling2D((2, 2), name='max_pool2'))
model.add(layers.Flatten())
model.add(layers.Dense(16, kernel_initializer='glorot_uniform', activation='relu',
kernel_regularizer=keras.regularizers.l2(0.00001), activity_regularizer=keras.regularizers.l1(0.00001), name='fc1'))
model.add(layers.Dropout(0, noise_shape=None, seed=None))
model.add(layers.Dense(1, kernel_initializer='glorot_uniform', activation='sigmoid',
kernel_regularizer=keras.regularizers.l2(0.00001), activity_regularizer=keras.regularizers.l1(0.00001), name='fc2'))
model.add(layers.Dropout(0, noise_shape=None, seed=None))
model.compile(loss='binary_crossentropy', optimizer = Adadelta, metrics=['accuracy'])
# Fit the model
history = model.fit_generator(
train_generator,
steps_per_epoch=100,
epochs=40,
validation_data=validation_generator,
validation_steps=50,
verbose=1,
)
prediction = model.predict_generator(test_generator, steps=len(test_generator))
# Change the predictions on the test set to a list "prediction_list".
new_prediction = []
for i in range (0, len(prediction)):
new_prediction.append(prediction[i][0])
prediction_list = []
for predict_value in new_prediction:
if predict_value >= 0.5:
prediction_list.append(1)
else:
prediction_list.append(0)
# Collect all of the labels in the test set into a list "test_value_list".
test_value_list = []
for i in range(0 , len(test_generator)):
test_value_list.extend(test_generator[i][1].tolist())
# Plot the AUC
fpr, tpr, threshold = metrics.roc_curve(test_value_list, new_prediction)
roc_auc = metrics.auc(fpr, tpr)
plt.title('Receiver Operating Characteristic')
plt.plot(fpr, tpr, 'b', label = 'AUC = %0.2f' % roc_auc)
plt.legend(loc = 'lower right')
plt.plot([0, 1], [0, 1],'r--')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()
The AUC curve of the assessment of the best model on the test data is demonstated above. The value of AUC is 0.97, which represents a good test.
misclassified_portrait = []
misclassified_landscape = []
for i in range (0, len(prediction)):
if test_value_list[i] == prediction_list[i]:
pass
elif prediction_list[i] == 0: # Misclassify portrait as landsapce
misclassified_portrait.append(i)
else: # Misclassify landsapce as portrait
misclassified_landscape.append(i)
print('The number of portrait misclassified as landsapce', len(misclassified_portrait))
print('The number of landscape misclassified as portrait', len(misclassified_landscape))